# Copyright (c) Alibaba, Inc. and its affiliates.

from evalscope.run import run_task
from evalscope.utils.logger import get_logger

logger = get_logger()


gpt_4_o = {
    "model_name": "gpt-4o",
    "api_base": "http://localhost:8088/v1",
    "api_key": "EMPTY",
}

qwen2 = {
    "model_name": "qwen2.5",
    "api_base": "http://localhost:8000/v1",
    "api_key": "EMPTY",
}


def run_eval():
    generate_testset_task_cfg = {
        "eval_backend": "RAGEval",
        "eval_config": {
            "tool": "RAGAS",
            "testset_generation": {
                "docs": ["README.md"],
                "test_size": 10,
                "output_file": "outputs/testset_chinese.json",  # json file
                "distribution": {"simple": 0.3, "multi_context": 0.3, "reasoning": 0.4},
                "generator_llm": {
                    "model_name_or_path": "qwen/Qwen2-7B-Instruct",
                    "template_type": "qwen",
                },
                "embeddings": {
                    "model_name_or_path": "AI-ModelScope/m3e-base",
                },
            },
        },
    }

    generate_zh_cfg = {
        "eval_backend": "RAGEval",
        "eval_config": {
            "tool": "RAGAS",
            "testset_generation": {
                "docs": ["test_zh.md"],
                "test_size": 10,
                "output_file": "outputs/testset_chinese.json",  # json file
                "distribution": {"simple": 0.3, "multi_context": 0.3, "reasoning": 0.4},
                "generator_llm": gpt_4_o,
                "embeddings": {
                    "model_name_or_path": "AI-ModelScope/bge-large-zh",
                },
                "language": "chinese",
            },
        },
    }

    eval_task_cfg = {
        "eval_backend": "RAGEval",
        "eval_config": {
            "tool": "RAGAS",
            "eval": {
                "testset_file": "outputs/testset_chinese_with_answer.json",
                "critic_llm": qwen2,
                "embeddings": {
                    "model_name_or_path": "AI-ModelScope/bge-large-zh",
                },
                "metrics": [
                    "Faithfulness",
                    "AnswerRelevancy",
                    "ContextPrecision",
                    "AnswerCorrectness",
                ],
                "language": "chinese",
            },
        },
    }

    multi_modal_eval_task_cfg = {
        "eval_backend": "RAGEval",
        "eval_config": {
            "tool": "RAGAS",
            "eval": {
                "testset_file": "outputs/testset_multi_modal.json",
                "critic_llm": gpt_4_o,
                "embeddings": {
                    "model_name_or_path": "AI-ModelScope/bge-large-zh",
                },
                "metrics": [
                    "MultiModalFaithfulness",
                    "MultiModalRelevance",
                ],
                "language": "chinese",
            },
        },
    }

    # Run task
    # run_task(task_cfg=eval_task_cfg)
    # or
    run_task(task_cfg=generate_testset_task_cfg)


if __name__ == "__main__":
    run_eval()
